R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document

INSTALL NECESSARY PACKAGES:

# install.packages("erp.easy")
library(erp.easy)
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.0.5
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(Hmisc)
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
## Loading required package: ggplot2
## 
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:dplyr':
## 
##     src, summarize
## The following objects are masked from 'package:base':
## 
##     format.pval, units

LOCATE FOLDERS:

# Locate the folder for the EEG output files (.txt) for old and new nets, replace the file location below with the one in your local device:
path_newnets <- "/Users/aysuerdemir/Desktop/R workspace/ERP_Zoo/CrossSectional/Mix/newnets/"
path_oldnets <- "/Users/aysuerdemir/Desktop/R workspace/ERP_Zoo/CrossSectional/Mix/oldnets/"
# Enter the number of participants in each group:
subs_new <- 57
subs_old <- 11

LOAD DATA:

# Load data into dataframes for each condition separately (the exported .txt files appear separately for each condition):
neg_go <- load.data(path_newnets,"NegGo", subs_new, -100, 999) 
neg_nogo <- load.data(path_newnets,"NegNoGo", subs_new, -100, 999)
neut_go <- load.data(path_newnets,"NeutGo", subs_new, -100, 999)
neut_nogo <- load.data(path_newnets,"NeutNoGo", subs_new, -100, 999)

# Combine all conditions together into a single dataframe:
combo_new <- rbind.data.frame(neg_go, neg_nogo, neut_go, neut_nogo) 
combo_new <- as.data.frame(unclass(combo_new), stringsAsFactors=TRUE)

# Repeat for old nets:
neg_go_old <- load.data(path_oldnets,"NegGo", subs_old, -100, 999) 
neg_nogo_old <- load.data(path_oldnets,"NegNoGo", subs_old, -100, 999)
neut_go_old <- load.data(path_oldnets,"NeutGo", subs_old, -100, 999)
neut_nogo_old <- load.data(path_oldnets,"NeutNoGo", subs_old, -100, 999)

combo_old <- rbind.data.frame(neg_go_old, neg_nogo_old, neut_go_old, neut_nogo_old) 
combo_old <- as.data.frame(unclass(combo_old),stringsAsFactors=TRUE)

SPECIFY THE ELECTRODE NUMBERS FOR P2, N2 and P3:

# We are only interested in frontal P2 (positive) and N2 (negative)
# Posterior P3 and Frontal P3

p2n2_newnets <- c("V4","V5","V10","V11","V12","V16","V18","V19") 
p2n2_oldnets <- c("V4","V5","V10","V11","V12","V16","V19", "V20")
p3_newnets <- c("V54","V61","V62","V67","V72","V77","V78","V79") 
p3_oldnets <- c("V54","V61","V62","V67","V68","V73","V78","V79","V80") 

#Create average waveform plots for each subject in a single, multiplot window
mosaic(combo_new, p2n2_newnets, cols = 3, rows = 2)

mosaic(combo_new, p3_newnets, cols = 3, rows = 2)

# CODE BELOW GETS ALL THE MEASURES (N2, P2, P3) FROM OLD AND NEW NET DATA, COMBINE THEM TOGETHER AND IT SAVES THE DATA INTO A FINAL COMBO SPREADSHEET: # CHECK THE WINDOW RANGE FOR EACH ERP COMPONENT AND ADJUST AS NEEDED! * The dependent measures we use are mean amplitude (microvolts) and latency (in ms) for statistical analysis. * m.measures gives the mean amplitude for a specific window and its dt dev. along with its graph. specify lgnd = “n” (no legend) if you do not want the legend * p.measures calculates local or simple peak amplitude and latency for each condition in the data frame. Use latency only from p.measures * pol = The polarity of peaks to favor when multiple peaks are present. Entering “pos” will locate the most positive peak. Entering “neg” will locate the most negative peak. Entering “abs” will find the greatest deviation from 0, regardless of the polarity.

# Get the measures from the old NEW net:
MeanAmp_P2_newnets <- (m.measures(combo_new, p2n2_newnets, window=c(150,300))) 

MeanAmp_N2_newnets <- (m.measures(combo_new, p2n2_newnets, window=c(350,550))) 

MeanAmp_P3_newnets <- (m.measures(combo_new, p3_newnets, window=c(450,750)))  

Latency_P2_newnets <- (p.measures(combo_new, p2n2_newnets, window=c(150,300), pol="pos")) 

Latency_N2_newnets <- (p.measures(combo_new, p2n2_newnets, window=c(350,550), pol="neg"))

Latency_P3_newnets <- (p.measures(combo_new, p3_newnets, window=c(450,750), pol="pos"))

# Combine all results together
# You can use full_join or merge in the same way:

total_new <- MeanAmp_P2_newnets %>%
            full_join(MeanAmp_N2_newnets, by = c("Subject", "Trial Type"), suffix = c(".P2", ".N2")) %>%
            full_join(MeanAmp_P3_newnets, by = c("Subject", "Trial Type"))  %>%
            full_join(Latency_P2_newnets, by = c("Subject", "Trial Type"))  %>%
            full_join(Latency_N2_newnets, by = c("Subject", "Trial Type"), suffix = c(".P2", ".N2")) %>%
            full_join(Latency_P3_newnets, by = c("Subject", "Trial Type"))

# rename the variables without any suffix here:
# rename does not work properly unless you specify the package - some conflict:
total_new <- total_new %>% dplyr::rename("Mean Amplitude.P3" = "Mean Amplitude",
                                      "Standard Dev.P3" = "Standard Dev",
                                      "Peak Latency.P3" = "Peak Latency",
                                      "Peak Amplitude.P3" = "Peak Amplitude")

# Get the measures from the old OLD net:
MeanAmp_P2_oldnets <- (m.measures(combo_old, p2n2_oldnets, window=c(150,300))) 

MeanAmp_N2_oldnets <- (m.measures(combo_old, p2n2_oldnets, window=c(350,550)))

MeanAmp_P3_oldnets <- (m.measures(combo_old, p3_oldnets, window=c(450,750))) 

Latency_P2_oldnets <- (p.measures(combo_old, p2n2_oldnets, window=c(150,300), pol="pos")) 

Latency_N2_oldnets <- (p.measures(combo_old, p2n2_oldnets, window=c(350,550), pol="neg"))

Latency_P3_oldnets <- (p.measures(combo_old, p3_oldnets, window=c(450,750), pol="pos"))

# Combine all results together
total_old <- MeanAmp_P2_oldnets %>%
            full_join(MeanAmp_N2_oldnets, by = c("Subject", "Trial Type"), suffix = c(".P2", ".N2")) %>%
            full_join(MeanAmp_P3_oldnets, by = c("Subject", "Trial Type"))  %>%
            full_join(Latency_P2_oldnets, by = c("Subject", "Trial Type"))  %>%
            full_join(Latency_N2_oldnets, by = c("Subject", "Trial Type"), suffix = c(".P2", ".N2")) %>%
            full_join(Latency_P3_oldnets, by = c("Subject", "Trial Type"))


# rename the variables without any suffix here:
total_old <- total_old %>% dplyr::rename("Mean Amplitude.P3" = "Mean Amplitude",
                            "Standard Dev.P3" = "Standard Dev",
                            "Peak Latency.P3" = "Peak Latency",
                            "Peak Amplitude.P3" = "Peak Amplitude")

# Combine old and new net data together:
combo <- full_join(total_new, total_old)
## Joining, by = c("Subject", "Trial Type", "Standard Dev.P2", "Mean
## Amplitude.P2", "Standard Dev.N2", "Mean Amplitude.N2", "Standard Dev.P3", "Mean
## Amplitude.P3", "Peak Latency.P2", "Peak Amplitude.P2", "Peak Latency.N2", "Peak
## Amplitude.N2", "Peak Latency.P3", "Peak Amplitude.P3")
# Making sure we are only adding new rows - participants
nrow(total_new) + nrow(total_old) == nrow(combo)
## [1] TRUE
ncol(total_old) == ncol(total_new) 
## [1] TRUE
ncol(total_new) ==  ncol(combo)
## [1] TRUE
# Remove Grand Ave from data, order by subject name and reset the index:
combo <- combo[!(combo$Subject=="Grand Avg"),]
combo <- with(combo,  combo[order(Subject) , ])
rownames(combo) <- NULL
unique(combo[c("Subject")])
##        Subject
## 1     AE050318
## 5     AH101121
## 9     AK022218
## 13    AK102221
## 17    AL041819
## 21    AL090917
## 25    AN122116
## 29    AO013020
## 33    AS110816
## 37    AT051818
## 41    AW040217
## 45    AW110418
## 49  BW071018T2
## 53  CC102318T2
## 57    CF101019
## 61    CL040218
## 65    CM120919
## 69    DJ052417
## 73    EC041817
## 77    EG030618
## 81    EM100417
## 85    ES031519
## 89    ES032018
## 93    FW121816
## 97    GB012817
## 101   GR071921
## 105   HC102117
## 109   HC111621
## 113   HH061919
## 117   JA092118
## 121 JG091119T3
## 125   JJ011018
## 129 JK032119T3
## 133   JS121321
## 137   JT051618
## 141   KA022017
## 145   KE050718
## 149   KT072319
## 153   LB012619
## 157   LG100721
## 161   LS100617
## 165 LW102219T3
## 169   MM040119
## 173 MR091118T2
## 177   MS102319
## 181   NL041119
## 185   OG013016
## 189   PB021519
## 193   PW030417
## 197   RB101619
## 201   RH100218
## 205 RK040219T3
## 209   RS030518
## 213   RT032219
## 217   SA072518
## 221   SB111121
## 225   SK041519
## 229   SL090418
## 233   SP010219
## 237   ST100121
## 241   TA051917
## 245   TE062818
## 249   TS011518
## 253   WB110221
## 257   WF080417
## 261   WH022219
## 265   WK011122
## 269   WS051018
# CREATE A NEW COLUMN by taking the difference between N2-P2
combo$`N2P2avg` <- combo$`Mean Amplitude.N2` - combo$`Mean Amplitude.P2`
combo$`N2P2peak` <- combo$`Peak Amplitude.N2` - combo$`Peak Amplitude.P2`

# Write to a csv file:
# write.csv(combo, "/Users/aysuerdemir/Desktop/R workspace/ERP_Zoo/CrossSectional/Mix/combo.csv")
head(combo)
##    Subject Trial Type Standard Dev.P2 Mean Amplitude.P2 Standard Dev.N2
## 1 AE050318      NegGo        3.021852         -6.470334        3.907745
## 2 AE050318    NegNoGo        5.962168          3.531623        2.249580
## 3 AE050318     NeutGo        2.577527        -10.326151        3.458130
## 4 AE050318   NeutNoGo        5.419626        -10.376971        2.866578
## 5 AH101121      NegGo        2.182505         -7.668577        1.576681
## 6 AH101121    NegNoGo        3.187342         -4.029610        1.060874
##   Mean Amplitude.N2 Standard Dev.P3 Mean Amplitude.P3 Peak Latency.P2
## 1        -10.871784        1.377677          3.505749             220
## 2          2.023796        2.252998          3.432536             256
## 3        -15.687000        1.537678          4.367052             228
## 4         -7.857202        2.118532          7.358546             204
## 5        -24.930473        4.063984         18.547604             232
## 6        -24.144933        4.750914         14.173249             244
##   Peak Amplitude.P2 Peak Latency.N2 Peak Amplitude.N2 Peak Latency.P3
## 1        -2.7612018             536         -5.187617             648
## 2        11.4069294             452         -2.268911             596
## 3        -7.3050607             376        -20.651664             588
## 4        -4.5040871             452         -7.270952             720
## 5        -5.8547045             492        -26.713190             528
## 6        -0.5727539             476        -25.672664             508
##   Peak Amplitude.P3    N2P2avg   N2P2peak
## 1          5.088120  -4.401450  -2.426415
## 2          6.870649  -1.507826 -13.675841
## 3          7.176728  -5.360848 -13.346603
## 4         10.265959   2.519769  -2.766865
## 5         23.899982 -17.261896 -20.858486
## 6         18.808306 -20.115323 -25.099910

MERGE WITH INTAKE INFO:

# Load DataSet: 
intake <- read.csv(file = '/Users/aysuerdemir/Desktop/R workspace/ERP_Zoo/CrossSectional/Mix/CognitiveEmotionalLi-IntakeStutteringLang.csv')

# Subject IDs include the visit number in the combo dataset if it is NOT the first time point. 
# Do the same here: Combine visit number with subject and create a new Subject variable so that it matches the combo:
intake <- intake  %>%
  mutate(Subject = ifelse(visitnumber != 1, paste0(part_id_status, "T", visitnumber), part_id_status)) 

# Calculate the month difference using BIRTHDATE and CVE date to make sure the autocalculator is correct:
# Install and load the lubridate package
# install.packages("lubridate")

# library(lubridate)
# the %/% operator is used to perform the floor division of the interval by months(1), which represents one month.
# intake$month_diff <- interval(intake$date_birth, intake$cve_date) %/% months(1)
# print(intake$month_diff)
# print(round(intake$calculator_age_cve))
# Not a big difference between the hand calculated and auto calculated field. Let's use "calculator_age_cve"

# Create a new variable representing final sldper100words ("disfluency_sldper100words_final) by taking disfluency_sldper100words from CVD as primary, but in the case that this data is missing, take the disfluency scores from CVE:
intake <- intake  %>%
  mutate(disfluency_sldper100words_final = ifelse(!is.na(disfluency_sldper100words), disfluency_sldper100words, disfluency_sldper100words_cve)) 

# Create a final talker group variable ("talkergroup_final) using disfluency_sldper100words_final and talker group based on parent report:
# 1: CWS, 0:CWNS, 9:unidentified
intake <- intake  %>%
  mutate(talkergroup_final = ifelse((disfluency_sldper100words_final >= 3 | calculator_talkergroup_parent == 1), 1,
                                          ifelse((disfluency_sldper100words_final < 3 & calculator_talkergroup_parent == 0), 0, 9)))  


# ignoring the NA's:
# intake <- intake  %>%                                           
# mutate(talkergroup_overall_final = ifelse((!is.na(talkergroup_disfluency_final) & talkergroup_disfluency_final == 1) | 
#                                          (!is.na(calculator_talkergroup_parent) & calculator_talkergroup_parent == 1), 1,
#                                         ifelse((!is.na(talkergroup_disfluency_final) & talkergroup_disfluency_final == & 
#                                               (!is.na(calculator_talkergroup_parent) & calculator_talkergroup_parent == 0), 0, 9)))


# Take the relevant columns from intake dataset
# You may update this to take more columns into the dataset:
intake <-  subset(intake, select=c('Subject','calculator_age_cve','calculator_gender_cve',
                                   'calculator_talkergroup_parent','tso_calculated', 'disfluency_sldper100words','ssi_total', 
                                   'disfluency_sldper100words_final', 'talkergroup_final',
                                   "gfta_standard", "ppvt_standard", "evt_standard",             
                                   "teld_rec_standard","teld_exp_standard", "teld_spokenlang_standard",
                                   'cve_comments','comments_tasks'))

# Merge with the main dataset using SUBJECT
FULL <- merge(combo, intake, by=c("Subject"))
head(FULL)  
##    Subject Trial Type Standard Dev.P2 Mean Amplitude.P2 Standard Dev.N2
## 1 AE050318    NegNoGo        5.962168          3.531623        2.249580
## 2 AE050318   NeutNoGo        5.419626        -10.376971        2.866578
## 3 AE050318      NegGo        3.021852         -6.470334        3.907745
## 4 AE050318     NeutGo        2.577527        -10.326151        3.458130
## 5 AH101121    NegNoGo        3.187342         -4.029610        1.060874
## 6 AH101121      NegGo        2.182505         -7.668577        1.576681
##   Mean Amplitude.N2 Standard Dev.P3 Mean Amplitude.P3 Peak Latency.P2
## 1          2.023796        2.252998          3.432536             256
## 2         -7.857202        2.118532          7.358546             204
## 3        -10.871784        1.377677          3.505749             220
## 4        -15.687000        1.537678          4.367052             228
## 5        -24.144933        4.750914         14.173249             244
## 6        -24.930473        4.063984         18.547604             232
##   Peak Amplitude.P2 Peak Latency.N2 Peak Amplitude.N2 Peak Latency.P3
## 1        11.4069294             452         -2.268911             596
## 2        -4.5040871             452         -7.270952             720
## 3        -2.7612018             536         -5.187617             648
## 4        -7.3050607             376        -20.651664             588
## 5        -0.5727539             476        -25.672664             508
## 6        -5.8547045             492        -26.713190             528
##   Peak Amplitude.P3    N2P2avg   N2P2peak calculator_age_cve
## 1          6.870649  -1.507826 -13.675841               38.1
## 2         10.265959   2.519769  -2.766865               38.1
## 3          5.088120  -4.401450  -2.426415               38.1
## 4          7.176728  -5.360848 -13.346603               38.1
## 5         18.808306 -20.115323 -25.099910               70.5
## 6         23.899982 -17.261896 -20.858486               70.5
##   calculator_gender_cve calculator_talkergroup_parent tso_calculated
## 1                     0                             1            1.9
## 2                     0                             1            1.9
## 3                     0                             1            1.9
## 4                     0                             1            1.9
## 5                     1                             0             NA
## 6                     1                             0             NA
##   disfluency_sldper100words ssi_total disfluency_sldper100words_final
## 1                        12        23                              12
## 2                        12        23                              12
## 3                        12        23                              12
## 4                        12        23                              12
## 5                         0         6                               0
## 6                         0         6                               0
##   talkergroup_final gfta_standard ppvt_standard evt_standard teld_rec_standard
## 1                 1           121           126          123               146
## 2                 1           121           126          123               146
## 3                 1           121           126          123               146
## 4                 1           121           126          123               146
## 5                 0           104           111          111               110
## 6                 0           104           111          111               110
##   teld_exp_standard teld_spokenlang_standard cve_comments
## 1               135                      149             
## 2               135                      149             
## 3               135                      149             
## 4               135                      149             
## 5               115                      115             
## 6               115                      115             
##                                                        comments_tasks
## 1                                                                    
## 2                                                                    
## 3                                                                    
## 4                                                                    
## 5 Good data, attentive child, but sleepy during the Reactivity task. 
## 6 Good data, attentive child, but sleepy during the Reactivity task.

FIND THE UNDEFINED (9) TALKER GROUPS AND MANUALLY MARK THEM AS EITHER 1 or 0:

# Check the subject numbers with missing stuttering assessments:
# rows_with_null <- FULL[is.na(FULL$disfluency_sldper100words) | is.na(FULL$ssi_total) | is.na(FULL$ssi_severity), ] 
# | is.na(FULL_2$calculator_talkergroup_parent) makes no difference
# unique(rows_with_null$Subject)

# Show the rows where talkergroup_final = 9 or NA :
short_RT_rows <- subset(FULL, talkergroup_final == 9 |  is.na(talkergroup_final))
short_RT_rows
##        Subject Trial Type Standard Dev.P2 Mean Amplitude.P2 Standard Dev.N2
## 145 LW102219T3      NegGo        4.047862       -11.3533653        1.226660
## 146 LW102219T3    NegNoGo        7.716179       -14.3407325        1.015503
## 147 LW102219T3     NeutGo        3.809460       -16.0253408        3.957634
## 148 LW102219T3   NeutNoGo        7.502369       -13.4832903        2.684751
## 193   SA072518      NegGo        1.044944        -1.8257751        1.919929
## 194   SA072518    NegNoGo        3.253022        -2.8618672        2.448918
## 195   SA072518     NeutGo        3.037777        -1.3041270        2.886428
## 196   SA072518   NeutNoGo        2.757350        -0.2626314        2.512955
##     Mean Amplitude.N2 Standard Dev.P3 Mean Amplitude.P3 Peak Latency.P2
## 145         -15.50399       0.9484819         0.7954588             160
## 146         -20.08492       0.9063114         2.8402350             160
## 147         -13.05854       2.4802956        -2.5216476             284
## 148         -16.74960       1.1207562         5.1640531             276
## 193         -10.16839       2.3822622        11.3534178             244
## 194         -18.03140       3.8740673        25.2926602             240
## 195          -8.41987       1.0071346         7.7124837             232
## 196         -10.71480       3.0017872        11.6209099             264
##     Peak Amplitude.P2 Peak Latency.N2 Peak Amplitude.N2 Peak Latency.P3
## 145        -6.9007481             488         -16.33392             488
## 146        -5.0478235             392         -20.30981             684
## 147       -21.1105926             352         -18.30229             456
## 148       -23.1688279             352         -22.34015             680
## 193        -0.2973903             516         -12.97347             612
## 194         3.1531559             444         -21.27522             676
## 195         2.5669589             444         -11.77583             492
## 196         5.1291300             440         -14.92553             568
##     Peak Amplitude.P3    N2P2avg    N2P2peak calculator_age_cve
## 145          2.524455  -4.150623  -9.4331692               79.8
## 146          4.480037  -5.744191 -15.2619911               79.8
## 147          2.975747   2.966805   2.8082985               79.8
## 148          6.883948  -3.266312   0.8286815               79.8
## 193         15.744950  -8.342619 -12.6760764               73.0
## 194         31.451418 -15.169536 -24.4283726               73.0
## 195          9.651857  -7.115743 -14.3427915               73.0
## 196         16.708875 -10.452173 -20.0546604               73.0
##     calculator_gender_cve calculator_talkergroup_parent tso_calculated
## 145                     1                            NA             NA
## 146                     1                            NA             NA
## 147                     1                            NA             NA
## 148                     1                            NA             NA
## 193                     1                            NA             NA
## 194                     1                            NA             NA
## 195                     1                            NA             NA
## 196                     1                            NA             NA
##     disfluency_sldper100words ssi_total disfluency_sldper100words_final
## 145                        NA        NA                               0
## 146                        NA        NA                               0
## 147                        NA        NA                               0
## 148                        NA        NA                               0
## 193                        NA        NA                              NA
## 194                        NA        NA                              NA
## 195                        NA        NA                              NA
## 196                        NA        NA                              NA
##     talkergroup_final gfta_standard ppvt_standard evt_standard
## 145                NA            NA            NA           NA
## 146                NA            NA            NA           NA
## 147                NA            NA            NA           NA
## 148                NA            NA            NA           NA
## 193                NA            NA            NA           NA
## 194                NA            NA            NA           NA
## 195                NA            NA            NA           NA
## 196                NA            NA            NA           NA
##     teld_rec_standard teld_exp_standard teld_spokenlang_standard
## 145                NA                NA                       NA
## 146                NA                NA                       NA
## 147                NA                NA                       NA
## 148                NA                NA                       NA
## 193                NA                NA                       NA
## 194                NA                NA                       NA
## 195                NA                NA                       NA
## 196                NA                NA                       NA
##                                                 cve_comments
## 145 Did fluency count abs CELF screener completed at visit. 
## 146 Did fluency count abs CELF screener completed at visit. 
## 147 Did fluency count abs CELF screener completed at visit. 
## 148 Did fluency count abs CELF screener completed at visit. 
## 193                                                         
## 194                                                         
## 195                                                         
## 196                                                         
##                                                                                                                                                                                                                                                             comments_tasks
## 145 The participant was attentive and Movement was relatively minimal. However, appeared to have low accuracy on Zoo task despite understanding the task. He showed vocal discomfort for each button press on the no-go trials. His accuracy is expected to be below %50. 
## 146 The participant was attentive and Movement was relatively minimal. However, appeared to have low accuracy on Zoo task despite understanding the task. He showed vocal discomfort for each button press on the no-go trials. His accuracy is expected to be below %50. 
## 147 The participant was attentive and Movement was relatively minimal. However, appeared to have low accuracy on Zoo task despite understanding the task. He showed vocal discomfort for each button press on the no-go trials. His accuracy is expected to be below %50. 
## 148 The participant was attentive and Movement was relatively minimal. However, appeared to have low accuracy on Zoo task despite understanding the task. He showed vocal discomfort for each button press on the no-go trials. His accuracy is expected to be below %50. 
## 193                                                                                                                                                                                                                                                                       
## 194                                                                                                                                                                                                                                                                       
## 195                                                                                                                                                                                                                                                                       
## 196
# MANUALLY LABEL THE TALKERGROUP FOR NA's:
# LW102219T3 == 0 because there is a record of a disflunecy count of 0, although no parent report available. 
# SA072518 == 0, there is no indication in RedCap that the child stutterss

# Replace NA values in a specific column based on a condition:
FULL$talkergroup_final <- ifelse(FULL$Subject == "LW102219T3" & is.na(FULL$talkergroup_final), 0, FULL$talkergroup_final)
FULL$talkergroup_final <- ifelse(FULL$Subject == "SA072518" & is.na(FULL$talkergroup_final), 0, FULL$talkergroup_final)

# Making sure no 9 or NA remained:
any(FULL$talkergroup_final == 9 | is.na(FULL$talkergroup_final))
## [1] FALSE

YOU CAN CREATE A NEW SUBSET WITH ONLY GOOD DATA HERE:

# Create a new subset with ONLY good data, by removing those participants you identified as having bad data:
combo_good <- dplyr::filter(combo, Subject!="AE050318", Subject!="AL090917", Subject!="ES031519", Subject!="LS100617", Subject!="LG100721", Subject!="MS102319", Subject!="PB021519", Subject!="RT032219")

head(combo_good)
##    Subject Trial Type Standard Dev.P2 Mean Amplitude.P2 Standard Dev.N2
## 1 AH101121      NegGo        2.182505         -7.668577        1.576681
## 2 AH101121    NegNoGo        3.187342         -4.029610        1.060874
## 3 AH101121     NeutGo        1.839147         -4.235501        2.891194
## 4 AH101121   NeutNoGo        3.776016          0.651977        2.494542
## 5 AK022218      NegGo        4.825410          8.927914        5.937896
## 6 AK022218    NegNoGo        3.586909          5.862981        5.437614
##   Mean Amplitude.N2 Standard Dev.P3 Mean Amplitude.P3 Peak Latency.P2
## 1        -24.930473        4.063984          18.54760             232
## 2        -24.144933        4.750914          14.17325             244
## 3        -20.625652        4.391747          16.31334             224
## 4        -15.905247        5.252118          10.57876             236
## 5         -7.418664        1.763918          16.08891             204
## 6        -10.567050        6.287977          23.56859             204
##   Peak Amplitude.P2 Peak Latency.N2 Peak Amplitude.N2 Peak Latency.P3
## 1        -5.8547045             492         -26.71319             528
## 2        -0.5727539             476         -25.67266             508
## 3        -1.3806822             540         -23.88108             508
## 4         6.3521030             452         -19.63251             588
## 5        16.4295335             488         -14.04309             648
## 6        11.5147669             452         -19.36042             628
##   Peak Amplitude.P3   N2P2avg  N2P2peak
## 1          23.89998 -17.26190 -20.85849
## 2          18.80831 -20.11532 -25.09991
## 3          21.79867 -16.39015 -22.50040
## 4          15.80878 -16.55722 -25.98461
## 5          19.63726 -16.34658 -30.47263
## 6          36.67178 -16.43003 -30.87518

LOAD AND MERGE WITH BEHAVIORAL DATA:

# Load the file:
accuracy <- read.csv(file = '/Users/aysuerdemir/Desktop/R workspace/ERP_Zoo/CrossSectional/Mix/Merged_Zoo_05.18.23.csv')
# Take only the relevant variables:
accuracy <-  subset(accuracy, select=c('Name','VisitNumber','ShowStim.ACC','ShowStim.RESP','ShowStim.RT','StimTag'))

# Check out the class types for each variable. 
sapply(accuracy, class)
##          Name   VisitNumber  ShowStim.ACC ShowStim.RESP   ShowStim.RT 
##   "character"     "integer"     "integer"   "character"     "integer" 
##       StimTag 
##   "character"
# For ShowStim.RESP response 4 is a "character", not integer. 
print(class(accuracy$ShowStim.RESP))
## [1] "character"
# Convert character 4 for ShowStim.RESP to integer
# accuracy$ShowStim.RESP <- as.integer(accuracy$ShowStim.RESP)

# Create a new ACCURACY column based on Go or NoGo conditions.
# ShowStim.RESP: 4 means the child pushed the button (accurate for Go), NA means no response (accurate for NoGo). 
# 1 is accurate, 0 is inaccurate:
accuracy <- accuracy %>%
  filter((StimTag == 'negN' | StimTag == 'neuN') |
         (StimTag == 'negG' | StimTag == 'neuG')) %>%
  mutate(accuracy = case_when(
    (StimTag == 'negN' | StimTag == 'neuN') & ShowStim.RESP == '4' ~ 0,
    (StimTag == 'negN' | StimTag == 'neuN') & TRUE ~ 1,
    (StimTag == 'negG' | StimTag == 'neuG') & ShowStim.RESP == '4' ~ 1,
    (StimTag == 'negG' | StimTag == 'neuG') & TRUE ~ 0
  ))

####YOU MIGHT DECIDE TO REMOVE ALL TRIALS WITH RT SHORTER THAN 148MS FOR THE ACCURACY CALCULATION#####

# Display the short RT rows and how many of them there are:
short_RT_rows <- accuracy[(accuracy$ShowStim.RT < 148 & accuracy$ShowStim.RT > 1), ]
head(short_RT_rows)
##          Name VisitNumber ShowStim.ACC ShowStim.RESP ShowStim.RT StimTag
## 636  AO013020           1            0             4         117    neuN
## NA       <NA>          NA           NA          <NA>          NA    <NA>
## 2002 WF080417           1            0             4          59    neuN
## 2004 WF080417           1            1             4          75    neuG
## 2006 WF080417           1            0             4         136    neuN
## NA.1     <NA>          NA           NA          <NA>          NA    <NA>
##      accuracy
## 636         0
## NA         NA
## 2002        0
## 2004        1
## 2006        0
## NA.1       NA
print(nrow(short_RT_rows)) 
## [1] 587
# describe(short_RT_rows$StimTag)
# THERE ARE 587 ROWS WITH RT LOWER THAN 148ms but ALL of them are NOGO trials anyways, so they are incorrect anyways. 
# NO NEED TO CHANGE THE ACCURACY MEASURE ABOVE because Low RT is relevant only for Go trials. 

# Create a subset of whole data set by excluding the very short RT trials.
# accuracy_filtered <- accuracy[!(accuracy$ShowStim.RT < 148 & accuracy$ShowStim.RT > 1), ]
# discrepancies <- accuracy$accuracy != accuracy$accuracy_filtered
# discrepancy_rows <- which(discrepancies)
# print(discrepancy_rows)


# create ACCURACY PERCENTAGE column
accuracy_percent <- accuracy %>%
  group_by(Name, VisitNumber, StimTag) %>%
  dplyr::summarize(accuracy_percentage = mean(accuracy) * 100)
## `summarise()` has grouped output by 'Name', 'VisitNumber'. You can override
## using the `.groups` argument.
# calculate REACTION TIME for GO only
reaction_time <- accuracy %>%
  filter(accuracy == 1 & (StimTag == 'negG' | StimTag == 'neuG')) %>%
  group_by(Name, VisitNumber,StimTag) %>%
  dplyr::summarize(reaction_time = mean(ShowStim.RT))
## `summarise()` has grouped output by 'Name', 'VisitNumber'. You can override
## using the `.groups` argument.
# COMBINE accuracy_percent and reaction_time
eprime <- full_join(accuracy_percent, reaction_time, by=c("Name", "VisitNumber", "StimTag"))

# Combine visit number with subject and create a new Subject variable for eprime so that it matches the FULL
eprime <- eprime  %>% 
  mutate(Subject = ifelse(VisitNumber != 1, paste0(Name, "T", VisitNumber), Name)) 

# Rename the labels for StimTags on eprime data
eprime <- eprime %>% 
  mutate(StimTag = recode(StimTag, "negG" = "NegGo", "negN" = "NegNoGo", "neuG" = "NeutGo", "neuN" = "NeutNoGo"))

# Drop name and Visitnumber from eprime
# Ungroup the dataframe first
eprime <- ungroup(eprime)
eprime <- eprime %>%
  dplyr::select(-Name, -VisitNumber) # eprime <- select(eprime, -Name, -VisitNumber)

# Replace Trial Type in FULL with "StimTag" to be able to merge with eprime data
FULL <- FULL %>%
  dplyr::rename("StimTag" = "Trial Type")

# COMBINE ALL!!
ZOO <- merge(FULL, eprime, by=c("Subject", "StimTag"))
head(ZOO)
##    Subject  StimTag Standard Dev.P2 Mean Amplitude.P2 Standard Dev.N2
## 1 AE050318    NegGo        3.021852         -6.470334        3.907745
## 2 AE050318  NegNoGo        5.962168          3.531623        2.249580
## 3 AE050318   NeutGo        2.577527        -10.326151        3.458130
## 4 AE050318 NeutNoGo        5.419626        -10.376971        2.866578
## 5 AH101121    NegGo        2.182505         -7.668577        1.576681
## 6 AH101121  NegNoGo        3.187342         -4.029610        1.060874
##   Mean Amplitude.N2 Standard Dev.P3 Mean Amplitude.P3 Peak Latency.P2
## 1        -10.871784        1.377677          3.505749             220
## 2          2.023796        2.252998          3.432536             256
## 3        -15.687000        1.537678          4.367052             228
## 4         -7.857202        2.118532          7.358546             204
## 5        -24.930473        4.063984         18.547604             232
## 6        -24.144933        4.750914         14.173249             244
##   Peak Amplitude.P2 Peak Latency.N2 Peak Amplitude.N2 Peak Latency.P3
## 1        -2.7612018             536         -5.187617             648
## 2        11.4069294             452         -2.268911             596
## 3        -7.3050607             376        -20.651664             588
## 4        -4.5040871             452         -7.270952             720
## 5        -5.8547045             492        -26.713190             528
## 6        -0.5727539             476        -25.672664             508
##   Peak Amplitude.P3    N2P2avg   N2P2peak calculator_age_cve
## 1          5.088120  -4.401450  -2.426415               38.1
## 2          6.870649  -1.507826 -13.675841               38.1
## 3          7.176728  -5.360848 -13.346603               38.1
## 4         10.265959   2.519769  -2.766865               38.1
## 5         23.899982 -17.261896 -20.858486               70.5
## 6         18.808306 -20.115323 -25.099910               70.5
##   calculator_gender_cve calculator_talkergroup_parent tso_calculated
## 1                     0                             1            1.9
## 2                     0                             1            1.9
## 3                     0                             1            1.9
## 4                     0                             1            1.9
## 5                     1                             0             NA
## 6                     1                             0             NA
##   disfluency_sldper100words ssi_total disfluency_sldper100words_final
## 1                        12        23                              12
## 2                        12        23                              12
## 3                        12        23                              12
## 4                        12        23                              12
## 5                         0         6                               0
## 6                         0         6                               0
##   talkergroup_final gfta_standard ppvt_standard evt_standard teld_rec_standard
## 1                 1           121           126          123               146
## 2                 1           121           126          123               146
## 3                 1           121           126          123               146
## 4                 1           121           126          123               146
## 5                 0           104           111          111               110
## 6                 0           104           111          111               110
##   teld_exp_standard teld_spokenlang_standard cve_comments
## 1               135                      149             
## 2               135                      149             
## 3               135                      149             
## 4               135                      149             
## 5               115                      115             
## 6               115                      115             
##                                                        comments_tasks
## 1                                                                    
## 2                                                                    
## 3                                                                    
## 4                                                                    
## 5 Good data, attentive child, but sleepy during the Reactivity task. 
## 6 Good data, attentive child, but sleepy during the Reactivity task. 
##   accuracy_percentage reaction_time
## 1            84.16667      706.0297
## 2            57.50000            NA
## 3            90.83333      796.2385
## 4            72.50000            NA
## 5            99.16667      643.1176
## 6            87.50000            NA
write.csv(ZOO, "/Users/aysuerdemir/Desktop/R workspace/ERP_Zoo/CrossSectional/Mix/ZOO.csv")

YOU CAN CREATE A NEW SUBSET WITH ONLY HIGHER BEHAVIROAL ACCURACY!

# ZOO <- read.csv(file = '/Users/aysuerdemir/Desktop/R workspace/ERP_Zoo/CrossSectional/Mix/ZOO.csv')

ZOO_good <- subset(ZOO, accuracy_percentage > 75)
filtered_rows <- ZOO[ZOO$accuracy_percentage < 75, ]

nrow(ZOO)
## [1] 232
nrow(ZOO_good)
## [1] 163